/*******************************************************************************
  ARTICLE	GAY, GOBBI, GONI (2025) "REVOLUTIONARY TRANSITIONS. INHERITANCE    
            CHANGE AND FERTILITY DECLINE" JOURNAL OF POLITICAL ECONOMY         
                                                                               
  AUTHORS	VICTOR GAY, PAULA GOBBI, MARC GONI                                 
  CONTACT	victor.gay@tse-fr.eu; paula.eugenia.gobbi@ulb.be; marc.goni@uib.no 
  VERSION	1.0 (MAY 2025)                                                     
  SOFTWARE	STATA SE 18                                                        
  LICENCE	MIT                                                                
--------------------------------------------------------------------------------

CADASTRE DATA PREPARATION DO FILE

This file prepares cadastre data and generates the final cadastre dataset for analysis.

Instructions: 
-------------
	open do-files from directory where they are placed; order matters; run whole code.

Do-file structure: 
------------------
	1. HENRY MUNICIPALITIES INFORMATION
	2. CADASTRE DATASET

Main sources: 
-------------
	Cadastre archival sources (see readme)
		 
*/
********************************************************************************

version 18
set more off
clear all
global TEMP "../2_0_tempfiles"

timer on 1

* ==============================================================================
* 1. HENRY MUNICIPALITIES INFORMATION
* ------------------------------------------------------------------------------

import excel "../../1_raw_data/1_20_cadastre/cadastre.xlsx",                 ///
	sheet("cadastre_henry") cellrange(A4:M42) clear

rename A villagenum
label variable villagenum "Henry village identifier [Séguy 2001]"

rename B insee_com
label variable insee_com "INSEE identifier [COG 2011]"

rename C villagename
label variable villagename "Henry village name [Séguy 2001]"

rename D cadastre_year 
label variable cadastre_year "Cadastre year [Cadastre]"
destring cadastre_year, replace
replace cadastre_year = .a if cadastre_year == .
label define cadastre_year_lbl .a "Unkown"
label values cadastre_year cadastre_year_lbl

rename E partible
label variable partible "Indicator for partible inheritance"

rename F impart
label variable impart "Indicator for impartible inheritance"

rename G fem_incl
label variable fem_incl "Indicator for women included in inheritance"

rename H fem_excl
label variable fem_excl "Indicator for women excluded from inheritance"

rename I text1
label variable text1 "Soil texture [INRA 1998]"

generate text1_cat = ., after(J)
replace text1_cat = 1 if J == "Coarse"
replace text1_cat = 2 if J == "Medium"
replace text1_cat = 3 if J == "Medium fine"
replace text1_cat = 4 if J == "Fine"
label define text1_cat_lbl 1 "Coarse" 2 "Medium" 3 "Medium fine" 4 "Fine"
label values text1_cat text1_cat_lbl
drop J
label variable text1_cat "Soil texture, categories [INRA 1998]"

rename K area
label variable area "Area (km2) [GEOFLA 2011]"

rename L pop_1793
label variable pop_1793 "Population in 1793 [Perret et al. 2015]"

rename M density_1793
label variable density_1793 "Population density in 1793 (per km2) [GEOFLA 2011, Perret et al. 2015]"

generate cadastre_data = 0, after(cadastre_year)
replace cadastre_data = 1 if villagename == "CABRIS"
replace cadastre_data = 1 if villagename == "MAIZIERES"
replace cadastre_data = 1 if villagename == "ECHEVRONNE"
replace cadastre_data = 1 if villagename == "GUIMAEC"
replace cadastre_data = 1 if villagename == "GROZON"
replace cadastre_data = 1 if villagename == "SAINT-ANDRE-EN-BRESSE"
replace cadastre_data = 1 if villagename == "VIDEIX"
replace cadastre_data = 1 if villagename == "CHAMPIGNY"
label variable cadastre_data "Indicator for cadastre data available"

compress
save "$TEMP/cadastre_henry", replace
* ==============================================================================

* ==============================================================================
* 2. CADASTRE DATASET
* ------------------------------------------------------------------------------

local sheet "CABRIS_A CABRIS_B CABRIS_C CABRIS_Cbis CABRIS_D CABRIS_E"
local sheet "`sheet' CABRIS_Ebis CHAMPIGNY_A CHAMPIGNY_B CHAMPIGNY_C" 
local sheet "`sheet' CHAMPIGNY_D CHAMPIGNY_E CHAMPIGNY_F CHAMPIGNY_G"
local sheet "`sheet' CHAMPIGNY_H CHAMPIGNY_J ECHEVERONNE_A ECHEVERONNE_B"
local sheet "`sheet' ECHEVERONNE_C ECHEVERONNE_D ECHEVERONNE_E"
local sheet "`sheet' GERMOND-ROUVRE_A GERMOND-ROUVRE_B GERMOND-ROUVRE_C"
local sheet "`sheet' GERMOND-ROUVRE_D GUIMAEC_A"
local sheet "`sheet' GUIMAEC_B GUIMAEC_C GUIMAEC_D GUIMAEC_E MAIZIERES_A"
local sheet "`sheet' MAIZIERES_B MAIZIERES_C MAIZIERES_D MAIZIERES_E"
local sheet "`sheet' SAINT-ANDRE-EN-BRESSE_A VIDEIX_A VIDEIX_B VIDEIX_C"
local sheet "`sheet' GROZON_A GROZON_B GROZON_C GROZON_D GROZON_E GROZON_F"

foreach s of local sheet {

import excel "../../1_raw_data/1_20_cadastre/cadastre.xlsx", sheet("`s'") clear
drop in 1/2
keep A-H

/* plot_id */
rename A plot_id
label variable plot_id "Plot identifier [Cadastre]"

/* plots */
rename B plots
destring plots, replace
label variable plots "Number of plots [Cadastre]"
replace plots = 1 if missing(plots)

/* plot type */
generate plot_type = ., after(plots)
label variable plot_type "Type of plot [Cadastre]"
label define plot_type_lbl 1 "Single" 2 "Multiple"
replace plot_type = 1 if plots == 1
replace plot_type = 2 if plots > 1
label values plot_type plot_type_lbl

/* land type */ 
generate land_type = ., after(C)
label variable land_type "Type of land [Cadastre]"
label define land_type_lbl 1 "Arable land"
label define land_type_lbl 2 "Olive trees", add
label define land_type_lbl 3 "Vines and olive trees", add
label define land_type_lbl 4 "Vines", add
label define land_type_lbl 5 "Plantations", add
label define land_type_lbl 6 "Orchards", add
label define land_type_lbl 7 "Pastures", add /* not productive */
label define land_type_lbl 8 "Prairies", add /* not productive */
label define land_type_lbl 9 "Buildings", add /* not productive */
label define land_type_lbl 10 "Gardens", add
label define land_type_lbl 11 "Forest", add /* not productive */
label define land_type_lbl 12 "Hemp", add
label define land_type_lbl 13 "Pond", add /* not productive */
label define land_type_lbl 14 "Barren", add /* not productive */
label define land_type_lbl 15 "Gardens and orchards", add

replace land_type = 1 if C == "Terres labourables"
replace land_type = 1 if C == "Terres Labourables"
replace land_type = 1 if C == "Labour"
replace land_type = 1 if C == "labour "
replace land_type = 2 if C == "Oliviers"
replace land_type = 3 if C == "Vignes et oliviers"
replace land_type = 4 if C == "Vignes"
replace land_type = 4 if C == "Vignes "
replace land_type = 5 if C == "Plantations"
replace land_type = 6 if C == "Vergers"
replace land_type = 6 if C == "verger"
replace land_type = 6 if C == "Vergiers"
replace land_type = 6 if C == " Chataigneraies"
replace land_type = 6 if C == "Cerisaies"
replace land_type = 7 if C == "Pâtures"
replace land_type = 7 if C == "Patures"
replace land_type = 8 if C == "Prére"
replace land_type = 9 if C == "Batiments et cours"
replace land_type = 9 if C == "Batiments & Cours"
replace land_type = 10 if C == "Jardins"
replace land_type = 10 if C == "jardin"
replace land_type = 10 if C == "Jardin "
replace land_type = 11 if C == "Bois"
replace land_type = 12 if C == "Chenevrières"
replace land_type = 12 if C == "Chenevières "
replace land_type = 12 if C == "Chenevières"
replace land_type = 13 if C == "Maree"
replace land_type = 14 if C == "Fruites"
replace land_type = 14 if C == "Frichee"
replace land_type = 15 if C == "Jardins et vergers"

label values land_type land_type_lbl
drop C

/* productive land */
generate land_prod = 1
label variable land_prod "Indicator for productive land [Cadastre]"
replace land_prod = 0 if land_type == 7
replace land_prod = 0 if land_type == 8
replace land_prod = 0 if land_type == 9
replace land_prod = 0 if land_type == 11
replace land_prod = 0 if land_type == 13
replace land_prod = 0 if land_type == 14

/* area: arpents */
rename D area_arp
label variable area_arp "Area (arpents) [Cadastre]"
destring area_arp, replace
replace area_arp = 0 if missing(area_arp)

/* area: perches */
rename E area_per
label variable area_per "Area (perches) [Cadastre]"
destring area_per, replace
replace area_per = 0 if missing(area_per)

/* area: metres */
rename F area_met
label variable area_met "Area (metres) [Cadastre]"
destring area_met, replace
replace area_met = 0 if missing(area_met)

/* area: total */
generate area = (10000 * area_arp) + (100 * area_per) + area_met, after(area_met)
label variable area "Plot area (meters) [Cadastre]"
drop area_arp area_per area_met

/* revenues: francs */
rename G revenues_francs
label variable revenues_francs "Revenues (francs) [Cadastre]"
destring revenues_francs, replace
replace revenues_francs = 0 if missing(revenues_francs)

/* revenues: cents */
rename H revenues_cents
label variable revenues_cents "Revenues (cents) [Cadastre]"
destring revenues_cents, replace
replace revenues_cents = 0 if missing(revenues_cents)

/* area: total */
generate revenues = revenues_francs + (revenues_cents / 100), after(revenues_cents)
label variable revenues "Plot value (francs) [Cadastre]"
drop revenues_francs revenues_cents

/* section */
generate section = "`s'", before(plot_id)

compress
save "$TEMP/`s'", replace
}

clear
foreach s of local sheet {
	append using "$TEMP/`s'"
	erase "$TEMP/`s'.dta"
}

split section, parse("_")
drop section
rename section1 villagename
rename section2 section
order villagename section, first

drop if land_prod == 0
drop land_prod

replace villagename = "ECHEVRONNE" if villagename == "ECHEVERONNE"

* COMBINE DATA

merge m:1 villagename using "$TEMP/cadastre_henry", assert(2 3) keep(3) nogenerate
order villagenum insee_com cadastre_year villagename cadastre_year-density_1793, first
drop cadastre_data

/* treatment variable */
generate treated = 0, before(partible)
replace treated = 1 if (impart == 1 | fem_excl == 1)
label variable treated "Indicator for treated municipality"

/* bundled plots */
generate bundled_plots = 0
replace bundled_plots = 1 if villagename == "MAIZIERES" | villagename == "CHAMPIGNY"
label variable bundled_plots "Indicator for cadastre data partially bundled [Cadastre]"

/* attribute mean to bundled plots */
expand plots
sort villagename plot_id
replace area = area / plots
replace revenues = revenues / plots
label define plot_type_lbl 3 "Multiple (unit averaged)", add
label values plot_type plot_type_lbl 
replace plot_type = 3 if plot_type == 2

/* area in hectares */
generate area_hec = area / 10000, after(area)
label variable area_hec "Plot area (hecatares) [Cadastre]"

/* plot bins (absolute size) */
generate plot_bin_hec = ., after(area_hec)
label define plot_bin_hec_lbl 1 "0-0.1" 
label define plot_bin_hec_lbl 2 "0.1-0.2", add 
label define plot_bin_hec_lbl 3 "0.2-0.3", add 
label define plot_bin_hec_lbl 4 "0.3-0.5", add 
label define plot_bin_hec_lbl 5 "0.5-0.75" , add
label define plot_bin_hec_lbl 6 "0.75-1", add
label define plot_bin_hec_lbl 7 "1-2", add 
label define plot_bin_hec_lbl 8 "2-3", add 
label define plot_bin_hec_lbl 9 "3-5", add 
label define plot_bin_hec_lbl 10 "5+", add 
label values plot_bin_hec plot_bin_hec_lbl
label variable plot_bin_hec "Plot area bin (hectares) [Cadastre]"

replace plot_bin_hec = 1 if (area_hec >= 0 & area_hec < 0.1)
replace plot_bin_hec = 2 if (area_hec >= .1 & area_hec < .2)
replace plot_bin_hec = 3 if (area_hec >= .2 & area_hec < .3)
replace plot_bin_hec = 4 if (area_hec >= .3 & area_hec < .5)
replace plot_bin_hec = 5 if (area_hec >= .5 & area_hec < .75)
replace plot_bin_hec = 6 if (area_hec >= .75 & area_hec < 1)
replace plot_bin_hec = 7 if (area_hec >= 1 & area_hec < 2)
replace plot_bin_hec = 8 if (area_hec >= 2 & area_hec < 3)
replace plot_bin_hec = 9 if (area_hec >= 3 & area_hec < 5)
replace plot_bin_hec = 10 if area_hec >= 5

/* relative area size */ 
by villagename, sort: egen area_total = sum(area)
generate area_sh = 100 * area / area_total, after(area_hec)
label variable area_sh "Plot area (share of municipality area, %) [Cadastre]"
drop area_total area

/* plot bins (relative size) */
generate plot_bin_sh = ., after(plot_bin)
label define plot_bin_sh_lbl 1 "0-0.1" 
label define plot_bin_sh_lbl 2 "0.1-0.2", add 
label define plot_bin_sh_lbl 3 "0.2-0.3", add 
label define plot_bin_sh_lbl 4 "0.3-0.5", add 
label define plot_bin_sh_lbl 5 "0.5-0.75" , add
label define plot_bin_sh_lbl 6 "0.75-1", add
label define plot_bin_sh_lbl 7 "1-2", add 
label define plot_bin_sh_lbl 8 "2-3", add 
label define plot_bin_sh_lbl 9 "3-5", add 
label define plot_bin_sh_lbl 10 "5+", add 
label values plot_bin_sh plot_bin_sh_lbl
label variable plot_bin_sh "Plot area bin (share of municipality area, ‰) [Cadastre]"

replace area_sh = area_sh * 10 /* perthousand */
replace plot_bin_sh = 1 if (area_sh >= 0 & area_sh < 0.1)
replace plot_bin_sh = 2 if (area_sh >= .1 & area_sh < .2)
replace plot_bin_sh = 3 if (area_sh >= .2 & area_sh < .3)
replace plot_bin_sh = 4 if (area_sh >= .3 & area_sh < .5)
replace plot_bin_sh = 5 if (area_sh >= .5 & area_sh < .75)
replace plot_bin_sh = 6 if (area_sh >= .75 & area_sh < 1)
replace plot_bin_sh = 7 if (area_sh >= 1 & area_sh < 2)
replace plot_bin_sh = 8 if (area_sh >= 2 & area_sh < 3)
replace plot_bin_sh = 9 if (area_sh >= 3 & area_sh < 5)
replace plot_bin_sh = 10 if area_sh >= 5
replace area_sh = area_sh / 10 /* percent */

label variable villagename "Henry village name [Séguy 2001]"
label variable section "Village section [Cadastre]"

erase "$TEMP/cadastre_henry.dta"

/* save dataset */
compress
save "../../3_outputs/3_1_datasets/cadastre.dta", replace
* ==============================================================================

timer off 1 /* 10 seconds */
timer list

